% Example 1

% The following example demonstrates the perceptron learning law.
% Andrew P. Paplinski, Monash University, Australia
% Last modified:  17 March 2000

% diary perc.d
% echo on
clear

p = 5; % dimensionality of the augmented input space
N = 50; % number of training patterns - size of the training epoch

% PART 1: Generation of the training and validation sets.

X = 2*rand(p-1, 2*N)-1; % a p-1 by 2N matrix of uniformly
       % distributed random numbers from the interval [-1,+1]

nn = round((2*N-1)*rand(N,1))+1; % generation of N random
       % integer numbers from the range [1..2N].
       % Repetitions are possible.

X(:,nn) = sin(X(:,nn)); % Columns of the matrix X pointed to by nn
         % are "coloured" with the function `sin', in order to
         % make the training patterns more interesting.

X = [X; ones(1,2*N)]; % Each input vector is prepended with a
                      % constant 1 to implement biasing.

wht = 3*rand(1,p)-1; wht = wht/norm(wht); % This is a unity
         % length vector orthogonal to the augmented separation
         % plane. It is also the target weight vector.
wht

D = (wht*X >= 0); % Classification of every point from the
        % input space with respect to the class number, 0 or 1.

Xv = X(:, N+1:2*N) ;   % The validation set is: Xv  p by N
Dv = D(:, N+1:2*N) ;   %                        Dv  1 by N

X  = X(:, 1:N) ;       % The training set is: X  p by N
D  = D(:, 1:N) ;       %                      D  1 by N
% [X; D]

% Visualisation} of the input-output patterns.
% The input space is  $p$-dimensional, hence difficult to visualise.
% We will plot projections of input patterns on a 2-D plane, say
% (x_1 - x_3)  plane. The projection is performed by extracting
% rows  pr  from the  X  and  D  matrices.

pr = [1, 3]; Xp = X(pr, :);
wp = wht([pr p]);   % projection of the weight vector

c0 = find(D==0); c1 = find(D==1);
     % c0 and c1 are vectors of pointers to input patterns  X
     % belonging to the class  0  or  1, respectively.

figure(1), clf reset

plot(Xp(1,c0),Xp(2,c0),'o', Xp(1, c1), Xp(2, c1),'x')
   % The input patterns are plotted on the selected projection
   % plane.  Patterns belonging to the class 0, or 1 are marked
   % with  'o' , or 'x' , respectively

axis(axis), hold on
   % The axes and the contents of the current plot are frozen

% Superimposition of the projection of the separation plane on the
% plot. The projection is a straight line. Four points lying on this
% line are found from the  line equation   wp . x = 0

L = [-1 1] ;
S = -diag([1 1]./wp(1:2))*(wp([2,1])'*L +wp(3)) ;
plot([S(1,:) L], [L S(2,:)]), grid, drawnow

% PART 2: Learning

% The training input-output patterns are stored in matrices
% X (p by N) and D (1 by N). We will start with a randomly
% selected weight vector (thus related separation/decision
% plane) and in the training procedure the weight vector
% should converge to the weight vector specifying the correct
% separation plane.

eta = 0.5;   % The training gain.
wh = 2*rand(1,p)-1;
     % Random initialisation of the weight vector with values
     % from the range [-1, +1]. An example of an initial
     % weight vector follows

% Projection of  the initial decision plane which is orthogonal
% to  wh}  is plotted as previously:

wp = wh([pr p]); % projection of the weight vector
S = -diag([1 1]./wp(1:2))*(wp([2,1])'*L +wp(3)) ;
plot([S(1,:) L], [L S(2,:)]), grid on, drawnow

% In what follows, the internal loop controlled by the variable
% n goes through  N  training exemplars (one epoch).
% The loop is repeated until the performance index (error) E is
% small, but not more than  C  times (C  epochs).
% The projection of the current decision surface is plotted
% after the previous projection has been erased.

C = 50;  % Maximum number of training epochs

E = [C+1, zeros(1,C)];  % Initialization of the vector of the
                     % total sums of squared errors over an epoch.

WW = zeros(C*N, p);  % The matrix  WW  will store all weight
            % vector  whone weight vector per row of the matrix WW
c = 1;      % c  is an epoch counter
cw = 0 ;    % cw total counter of weight updates

while (E(c)>1)|(c==1)
   c = c+1;
   plot([S(1,:) L], [L S(2,:)], 'w'), drawnow
                    % At the beginning of each internal
                    % loop the former projection of the decision
                    % plane is erased (option 'w')

   for n = 1:N    % The internal loop goes once through all
                    % training exemplars.
      eps = D(n) - ((wh*X(:,n)) >= 0); % eps(n) = d(n) - y(n)
      wh  = wh + eta*eps*X(:,n)'; % The Perceptron Learning Law
      cw = cw + 1;
      WW(cw, :) = wh/norm(wh); % The updated and normalised weight
                     % vector is stored in WW for feature plotting

      E(c) = E(c) + abs(eps) ;  % |eps| = eps^2
   end;
   wp = wh([pr p]);   % projection of the weight vector
   S = -diag([1 1]./wp(1:2))*(wp([2,1])'*L +wp(3)) ;
   plot([S(1,:) L], [L S(2,:)], 'g'), drawnow
end;

% After every pass through the set of training patterns,
% the projection of the  current decision plane,
% which is determined by the current weight vector,
% is plotted after the previous projection has been erased.

WW = WW(1:cw, pr);
E = E(2:c+1)
